#Building Maps
library(tidyverse)
## ββ Attaching packages βββββββββββββββββββββββββββββββββββββββ tidyverse 1.3.0 ββ
## β ggplot2 3.3.2 β purrr 0.3.4
## β tibble 3.0.4 β dplyr 1.0.2
## β tidyr 1.1.2 β stringr 1.4.0
## β readr 1.4.0 β forcats 0.5.0
## ββ Conflicts ββββββββββββββββββββββββββββββββββββββββββ tidyverse_conflicts() ββ
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_")
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
## Warning: Removed 54 rows containing missing values (geom_point).
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
size="Cases") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
# Cleaning up the graph
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 40 rows containing missing values (geom_point).
#Mapping Data to shapes
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
group_by(Province_State) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Province_State = tolower(Province_State))
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# load the US map data
us <- map_data("state")
# We need to join the us map data with our daily report to make one data frame/tibble
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
# plot state map
#Using R Color Palettes
# plot state map
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in the US'")
library(RColorBrewer)
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
# display.brewer.all(colorblindFriendly = TRUE)
# Get and format the covid report data
report_03_27_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# dim(report_03_27_2020)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
## Warning: Transformation introduced infinite values in discrete y-axis
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Massachusetts") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2"))
# plot state map
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Massachusetts'")
daily_report
## # A tibble: 14 x 2
## Admin2 Confirmed
## <chr> <dbl>
## 1 barnstable 283
## 2 berkshire 213
## 3 bristol 424
## 4 dukes and nantucket 12
## 5 essex 1039
## 6 franklin 85
## 7 hampden 546
## 8 hampshire 102
## 9 middlesex 1870
## 10 norfolk 938
## 11 plymouth 621
## 12 suffolk 1896
## 13 unassigned 270
## 14 worcester 667
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
ggtitle("COVID-19 Cases in MA") +
# Cleaning up the graph
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# Read in the daily report
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
group_by(Country_Region) %>%
summarize(Confirmed = sum(Confirmed), Deaths = sum(Deaths))
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# Read in the world map data
world <- as_tibble(map_data("world"))
# Check to see if there are differences in the naming of countries
setdiff(world$region, daily_report$Country_Region)
## [1] "Aruba" "Anguilla"
## [3] "American Samoa" "Antarctica"
## [5] "French Southern and Antarctic Lands" "Antigua"
## [7] "Barbuda" "Saint Barthelemy"
## [9] "Bermuda" "Ivory Coast"
## [11] "Democratic Republic of the Congo" "Republic of Congo"
## [13] "Cook Islands" "Cape Verde"
## [15] "Curacao" "Cayman Islands"
## [17] "Czech Republic" "Canary Islands"
## [19] "Falkland Islands" "Reunion"
## [21] "Mayotte" "French Guiana"
## [23] "Martinique" "Guadeloupe"
## [25] "Faroe Islands" "Micronesia"
## [27] "UK" "Guernsey"
## [29] "Greenland" "Guam"
## [31] "Heard Island" "Isle of Man"
## [33] "Cocos Islands" "Christmas Island"
## [35] "Chagos Archipelago" "Jersey"
## [37] "Siachen Glacier" "Kiribati"
## [39] "Nevis" "Saint Kitts"
## [41] "South Korea" "Saint Martin"
## [43] "Marshall Islands" "Macedonia"
## [45] "Myanmar" "Northern Mariana Islands"
## [47] "Montserrat" "New Caledonia"
## [49] "Norfolk Island" "Niue"
## [51] "Bonaire" "Sint Eustatius"
## [53] "Saba" "Nauru"
## [55] "Pitcairn Islands" "Palau"
## [57] "Puerto Rico" "North Korea"
## [59] "Madeira Islands" "Azores"
## [61] "Palestine" "French Polynesia"
## [63] "South Sandwich Islands" "South Georgia"
## [65] "Saint Helena" "Ascension Island"
## [67] "Solomon Islands" "Saint Pierre and Miquelon"
## [69] "Swaziland" "Sint Maarten"
## [71] "Turks and Caicos Islands" "Turkmenistan"
## [73] "Tonga" "Trinidad"
## [75] "Tobago" "Taiwan"
## [77] "USA" "Vatican"
## [79] "Grenadines" "Saint Vincent"
## [81] "Virgin Islands" "Vanuatu"
## [83] "Wallis and Futuna"
# Many of these countries are considered states or territories in the JHU covid reports,
# but let's fix a few of them
world <- as_tibble(map_data("world")) %>%
mutate(region = str_replace_all(region, c("USA" = "US", "Czech Republic" = "Czechia",
"Ivory Coast" = "Cote d'Ivoire", "Democratic Republic of the Congo" = "Congo (Kinshasa)",
"Republic of Congo" = "Congo (Brazzaville)")))
# Join the covid report with the map data
country_join <- left_join(world, daily_report, by = c("region" = "Country_Region"))
# Create the graph
ggplotly(
ggplot(data = world, mapping = aes(x = long, y = lat, text = region, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = country_join, aes(fill = Deaths), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
labs(title = "COVID-19 Deaths")
)
# Exercise 1
daily_report_1 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_")
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
ggplot(daily_report_1, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = "pink", fill = "grey") +
theme_minimal() +
geom_point(shape = 21, color='gold', fill='magenta', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
## Warning: Removed 82 rows containing missing values (geom_point).
daily_report_2 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
### Change made to Range from c(1,7) to c(1.5) from Better Visualization
mybreaks <- c(100, 1000, 10000, 100000, 1000000)
ggplot(daily_report_2, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,5),
breaks=mybreaks, labels = c("100-999",
"1,000-9,999", "10,000-99,999", "100,000-999,999", "1,000,000-9,999,999")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("100-999",
"1,000-9,999", "10,000-99,999", "100,000-999,999", "1,000,000-9,999,999")) +
# Cleaning up the graph to update date (9/26/20)
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US (9/26/20)") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 6 rows containing missing values (geom_point).
#Exercise 3
report_09_26_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# dim(report_09_26_2020)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, report_09_26_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "Pastel1"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_minimal()
## Warning: Transformation introduced infinite values in discrete y-axis
#Make an interactive plot using a state of your chosing using a theme different from used in the above exammples.
#Change Massachusetts to Hawaii
daily_report_4 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "New York") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
##
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
US4 <- map_data("state")
ny_us <- subset(US4, region == "new york")
counties <- map_data("county")
ny_county <- subset(counties, region == "new york")
state_join_ex4 <- left_join(ny_county, daily_report_4, by = c("subregion" = "Admin2"))
# plot state map
ggplotly(
ggplot(data = ny_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join_ex4, aes(fill = Confirmed), color = "white") +
ggtitle("COVID-19 Cases in NY (9/26/20)") +
theme(axis.line = element_blank(), axis.text = element_blank(),
axis.ticks = element_blank(), axis.title = element_blank()) +
scale_fill_viridis(option="plasma")
)
Interactive and Static Graphs found here
Application written in R (R Core Team 2015) using the data collected from the Novel Coronavirus repository (E., H., and L. 2020)
E., Dong, Du H., and Gardner L. 2020. βAn Interactive Web-Based Dashboard to Track Coivd-19 in Real Time.β Article. https://doi.org/10.1016/S1473-3099(20)30120-1.
R Core Team. 2015. βR: A Language and Environment for Statistical Computing.β Journal Article. http://www.R-project.org.